/**** This dofile is to randomize students: 
Inputs: 
*/

* generate value of random seeds 
drop _all
set obs 1000
set seed 142857 
g value=int(10^6*runiform())
levelsof value
gl randomseed "`r(levels)'"

********************************************************************************
*
*						GRADE 12 STUDENTS 
*
********************************************************************************
import excel using "$randomization/randomization_school_TVET_list.xlsx", sheet("Sheet1") firstrow clear

foreach v of varlist treat* {
	rename `v' treat
}
keep schoolname treat 
keep if treat==1 

tempfile TVET_schools
save `TVET_schools', replace
 
use "$clean/grade1112_baseline_reachable_wgrade.dta", clear 

* keep only grade 12 students with stream information
keep if grade==12 & stream~=99

* keep only schools assigned as treatment 
merge m:1 schoolname using `TVET_schools'
keep if _merge==3
drop _merge 

* randomize students: the number of treat students depend on number of mentors available to each school and is proportional to the total number of grade 12 students in each school 

/* Number of mentors 
tti_ks (40 + 21)
tti_thimphu (26 + 23)
tti_tyangste (30)
*/

bys schoolname: egen grade12size = total(grade==12)
foreach i in tti_ks tti_thimphu tti_tyangste {
	egen totalgrade12_`i' = total(grade==12 & `i'==1)
}

scalar nmentor_tti_ks   	= 40 + 21
scalar nmentor_tti_thimphu 	= 26 + 23
scalar nmentor_tti_tyangste = 30

g nmentor = .
foreach i in tti_ks tti_thimphu tti_tyangste {
replace nmentor = int(`=scalar(nmentor_`i')'*(grade12size/totalgrade12_`i')) if `i'==1 
replace nmentor = 1 if nmentor == 0
drop totalgrade12_`i'
}

bys schoolname: g temp=nmentor if _n==1
foreach i in tti_ks tti_thimphu tti_tyangste {
	egen totmentors_`i'=total(temp) if `i'==1 
}
egen tmentor_region=rowmax(totmentors_tti_ks-totmentors_tti_tyangste)
drop totmentors_* temp

* adjust the number of mentors based on geographical variable
tab tmentor_region

tab schoolname nmentor if tti_tyangste==1 // extra 1 mentors
replace nmentor = nmentor + 1 if regexm(schoolname, "jigme")

tab schoolname nmentor if tti_thimphu==1 // extra 5
replace nmentor = nmentor + 1 if regexm(schoolname, "shaba|dechencholing|rigzom|rinchen|shari")

tab schoolname nmentor if tti_ks==1 // extra 1 
replace nmentor = nmentor + 1 if regexm(schoolname, "damphu")

* final check 
drop tmentor_region 
bys schoolname: g temp=nmentor if _n==1
foreach i in  tti_ks tti_thimphu tti_tyangste {
	egen totmentors_`i'=total(temp) if `i'==1 
}
egen tmentor_region=rowmax(totmentors_tti_ks-totmentors_tti_tyangste)
drop totmentors_* temp

tab tmentor_region // ok

preserve
duplicates drop schoolname, force 
sort tti_ks tti_thimphu tti_tyangste
order schoolname nmentor grade12size tti_ks tti_thimphu tti_tyangste tmentor_region
keep schoolname nmentor grade12size tti_ks tti_thimphu tti_tyangste tmentor_region

export excel "$randomization/randomization_TVET_list.xlsx", sheetreplace firstrow(variables)

restore


****** Baseline variables 
* academic stream variables
g stream_art = stream==1
g stream_com = stream==2
g stream_sci = stream==3

* wealth index
foreach i in x11a x11b x11c x11d x11e x11f x11g x11h x11i {
	g i_`i'=`i'
	recode i_`i' (99=.)
}
pca i_x11a-i_x11i
predict wealth_index, score

* age

g age = 2021-b_year

* academic performance: math, dzhongkha, english 
foreach i in c1a c1b c1c {
	g i_`i' =`i'
	recode i_`i' (99=.)
}

* knowledge about TVET 
g i_f8b = f8b
recode i_f8b (99=.)

* ranking of TVET
destring(f9g), g(i_f9g)

* bias about blue-collar jobs
g i_d4d = d4d
recode i_d4d (99=.)
 
* big 5
foreach i of varlist p1* {
	g i_`i' = `i'
	recode i_`i' (99=.)
}
g extroversion = (5-i_p1a) + i_p1c + i_p1i
g conscientiousness = (5-i_p1b) + i_p1d + i_p1o
g openness =  i_p1f + i_p1j + i_p1k
g agreeableness = (5-i_p1l) + i_p1m + i_p1n
g neuroticism = (5-i_p1e) + (5-i_p1g) + i_p1h

* having a sibling/relative in TTI 
g relatives_tti = x13b~=0 & x13b~=99 | x14b~=0 & x14b~=99

gl balancetest sex age stream_art stream_com stream_sci i_c1a i_c1b i_c1c c2 c5c1 c5c2 c5c3 c5c4 c5c5 c5c6 c5c7 i_d4d i_f8b i_f9g extroversion conscientiousness openness agreeableness neuroticism wealth_index relatives_tti

******************************* randomize students *****************************
cap drop treatstudent_*

foreach j of global randomseed  {
set seed `j'  
bys schoolname: g rand_num=runiform()
bys schoolname: egen ordering=rank(rand_num)
bys schoolname: g treatstudent_`j' = ordering<=(nmentor*5)
cap drop rand_num ordering

cap g n=_n

* balance check: treat vs. control students
foreach variable of global balancetest {
qui ttest `variable', by(treatstudent_`j')
mat t`j' = r(t)
scalar nor`j'=(r(mu_1) - r(mu_2))/sqrt(0.5*(r(sd_1)^2 + r(sd_2)^2))
mat nor_diff`j' = (`=scalar(nor`j')')
mat R`j'`variable' = t`j', nor_diff`j'
}

mat R`j'=R`j'sex/R`j'age/R`j'stream_art/R`j'stream_com/R`j'stream_sci/R`j'i_c1a/R`j'i_c1b/R`j'i_c1c/R`j'c5c1/R`j'c5c2/R`j'c5c3/R`j'c5c4/R`j'c5c5/R`j'c5c6/R`j'c5c7/R`j'i_d4d/R`j'i_f8b/R`j'i_f9g/R`j'extroversion/R`j'conscientiousness/R`j'openness/R`j'agreeableness/R`j'neuroticism/R`j'wealth_index/R`j'relatives_tti
qui svmat double R`j'

rename R`j'1 t_`j'
rename R`j'2 NorDiff_`j'

* remove randomization results with	
	* statistically significant difference in means
	cap g tabs_`j'=abs(t_`j')
	cap qui summ tabs_`j'
	if r(max)>3.5 {
		cap drop *_`j'
	}
	cap drop tabs_`j'
}


* keep the result with minimized maximum abs(normalized difference)
preserve 
keep NorDiff* 
foreach v of varlist NorDiff* {
	cap g abs`v'=abs(`v')
	cap egen max_`v' = max(abs`v')
	drop abs`v'
}
egen minmax = rowmin(max*)

foreach v of varlist NorDiff* {
if max_`v' ~= minmax {
	cap drop `v' max_`v'
}
}
cap drop minmax
foreach var of varlist NorDiff* {
local seed = substr("`var'", 9, .)
}

local j = `seed'
restore 

cap drop treat* NorDiff_* 
cap drop  t_*

* expor the result with chosen seed
set seed `j'
cap drop rand_num ordering
bys schoolname: g rand_num=runiform()
bys schoolname: egen ordering=rank(rand_num)
bys schoolname: g treatstudent_`j' = ordering<=(nmentor*5)
cap drop rand_num ordering

****** balance check: treat vs. control students
foreach variable of global balancetest {
qui ttest `variable', by(treatstudent_`j')
mat Cn`j' = r(N_1)
mat Tn`j' = r(N_2)
mat C`j' = r(mu_1)
mat T`j' = r(mu_2)
mat diff`j' = r(mu_1) - r(mu_2)
mat diff`j' = r(mu_1) - r(mu_2)
mat t`j' = r(t)
scalar nor`j'=(r(mu_1) - r(mu_2))/sqrt(0.5*(r(sd_1)^2 + r(sd_2)^2))
mat nor_diff`j' = (`=scalar(nor`j')')

mat R`j'`variable' = Cn`j',C`j',Tn`j', T`j',diff`j',t`j', nor_diff`j'
}

mat R`j'=R`j'sex/R`j'age/R`j'stream_art/R`j'stream_com/R`j'stream_sci/R`j'i_c1a/R`j'i_c1b/R`j'i_c1c/R`j'c5c1/R`j'c5c2/R`j'c5c3/R`j'c5c4/R`j'c5c5/R`j'c5c6/R`j'c5c7/R`j'i_d4d/R`j'i_f8b/R`j'i_f9g/R`j'extroversion/R`j'conscientiousness/R`j'openness/R`j'agreeableness/R`j'neuroticism/R`j'wealth_index/R`j'relatives_tti

qui svmat double R`j'
rename R`j'1 CN_`j'
rename R`j'2 Cmean_`j'
rename R`j'3 TN_`j'
rename R`j'4 Tmean_`j'
rename R`j'5 Diff_`j'
rename R`j'6 t_`j'
rename R`j'7 NorDiff_`j'

cap format C* T* Di* t_* NorDiff* %10.3f
cap format CN* TN* %10.0f

preserve
sort n
cap drop schoolname
cap drop responseid-relatives_tti
cap drop variable
g variable=""
replace variable="age" if n==1
replace variable="male" if n==2
replace variable="Art stream" if n==3
replace variable="Commerce stream" if n==4
replace variable="Science stream" if n==5
replace variable="performance Math" if n==6
replace variable="performance Dzongkha" if n==7
replace variable="performance English" if n==8
replace variable="consult w parents about educareer" if n==9
replace variable="consult w siblings about educareer" if n==10
replace variable="consult w relatives about educareer" if n==11
replace variable="consult w friends about educareer" if n==12
replace variable="consult w neighbors about educareer" if n==13
replace variable="consult w teachers about educareer" if n==14
replace variable="consult w others about educareer" if n==15
replace variable="bias about blue-collar jobs" if n==16
replace variable="knowledge about TTI education" if n==17
replace variable="ranking of TTI degree" if n==18
replace variable="Big5 extroversion" if n==19
replace variable="Big5 conscientiousness" if n==20
replace variable="Big5 openness" if n==21
replace variable="Big5 agreeableness" if n==22
replace variable="Big5 neuroticism" if n==23
replace variable="wealth index" if n==24
replace variable="having relatives studying/having TTI degrees" if n==25
keep if n<=26
replace n=0 if n==26
sort n
order variable
replace variable="randomization seed" if n==0
cap drop n 
cap drop treat*

export delimited using "$randomization/randomization_student_TVET_mainlist_balance.csv", nolabel datafmt replace
restore 

foreach v of varlist treatstudent* {
	rename `v' treatstudent
}

preserve 
keep if treatstudent==1
sort schoolname stream sex studentid 
cap keep schoolname studentid name sex b_* stream phone
cap lab def stream 1 "Arts" 2 "Commerce" 3 "Science" 4 "Rigzhung"
cap lab val stream stream
order schoolname stream name studentid sex b_* stream phone
export excel using "$randomization/randomization_student_TVET_mainlist.xlsx", replace firstrow(variables)
restore 

save "$randomization/student_TVET_treatschools.dta", replace

/********************************************************************************
******* waitlist: randomly chosen from control students: one per mentor ********
********************************************************************************
keep if treatstudent==0

foreach j of global randomseed {
	set seed `j'
	cap drop waitlist 
	bys schoolname: g rand_num=runiform()
	bys schoolname: egen ordering=rank(rand_num)
	bys schoolname: g waitlist`j' = ordering<=(nmentor)
	cap drop rand_num ordering


cap g n=_n

* balance check: waitlist and non-waitlist
foreach variable of global balancetest {
qui ttest `variable', by(waitlist`j')
mat t`j' = r(t)
scalar nor`j'=(r(mu_1) - r(mu_2))/sqrt(0.5*(r(sd_1)^2 + r(sd_2)^2))
mat nor_diff`j' = (`=scalar(nor`j')')
mat R`j'`variable' = t`j', nor_diff`j'
}

mat R`j'=R`j'sex/R`j'age/R`j'stream_art/R`j'stream_com/R`j'stream_sci/R`j'i_c1a/R`j'i_c1b/R`j'i_c1c/R`j'c5c1/R`j'c5c2/R`j'c5c3/R`j'c5c4/R`j'c5c5/R`j'c5c6/R`j'c5c7/R`j'i_d4d/R`j'i_f8b/R`j'i_f9g/R`j'extroversion/R`j'conscientiousness/R`j'openness/R`j'agreeableness/R`j'neuroticism/R`j'wealth_index/R`j'relatives_tti
qui svmat double R`j'

rename R`j'1 t_`j'
rename R`j'2 NorDiff_`j'

* remove randomization results with	
	* statistically significant difference in means
	cap g tabs_`j'=abs(t_`j')
	cap qui summ tabs_`j'
	if r(max)>1.96 {
		cap drop *_`j'
	}
	cap drop tabs_`j'
}

* keep the result with minimized maximum abs(normalized difference)
preserve 
keep NorDiff* 
foreach v of varlist NorDiff* {
	cap g abs`v'=abs(`v')
	cap egen max_`v' = max(abs`v')
	drop abs`v'
}
egen minmax = rowmin(max*)

foreach v of varlist NorDiff* {
if max_`v' ~= minmax {
	cap drop `v' max_`v'
}
}
cap drop minmax
foreach var of varlist NorDiff* {
local seed = substr("`var'", 9, .)
}

local j = `seed'
restore 

drop waitlist* t_* NorDiff_* 

* Compare means an compute normalized difference
set seed `j'

bys schoolname: g rand_num=runiform()
bys schoolname: egen ordering=rank(rand_num)
bys schoolname: g waitlist`j' = ordering<=(nmentor*5)
cap drop rand_num ordering

****** balance check: treat vs. control students
foreach variable of global balancetest {
qui ttest `variable', by(waitlist`j')
mat Cn`j' = r(N_1)
mat Tn`j' = r(N_2)
mat C`j' = r(mu_1)
mat T`j' = r(mu_2)
mat diff`j' = r(mu_1) - r(mu_2)
mat diff`j' = r(mu_1) - r(mu_2)
mat t`j' = r(t)
scalar nor`j'=(r(mu_1) - r(mu_2))/sqrt(0.5*(r(sd_1)^2 + r(sd_2)^2))
mat nor_diff`j' = (`=scalar(nor`j')')

mat R`j'`variable' = Cn`j',C`j',Tn`j', T`j',diff`j',t`j', nor_diff`j'
}

mat R`j'=R`j'sex/R`j'age/R`j'stream_art/R`j'stream_com/R`j'stream_sci/R`j'i_c1a/R`j'i_c1b/R`j'i_c1c/R`j'c5c1/R`j'c5c2/R`j'c5c3/R`j'c5c4/R`j'c5c5/R`j'c5c6/R`j'c5c7/R`j'i_d4d/R`j'i_f8b/R`j'i_f9g/R`j'extroversion/R`j'conscientiousness/R`j'openness/R`j'agreeableness/R`j'neuroticism/R`j'wealth_index/R`j'relatives_tti

qui svmat double R`j'
rename R`j'1 CN_`j'
rename R`j'2 Cmean_`j'
rename R`j'3 TN_`j'
rename R`j'4 Tmean_`j'
rename R`j'5 Diff_`j'
rename R`j'6 t_`j'
rename R`j'7 NorDiff_`j'

cap format C* T* Di* t_* NorDiff* %10.3f
cap format CN* TN* %10.0f

preserve
sort n
cap drop schoolname
cap drop responseid-relatives_tti
cap drop variable
g variable=""
replace variable="age" if n==1
replace variable="male" if n==2
replace variable="Art stream" if n==3
replace variable="Commerce stream" if n==4
replace variable="Science stream" if n==5
replace variable="performance Math" if n==6
replace variable="performance Dzongkha" if n==7
replace variable="performance English" if n==8
replace variable="consult w parents about educareer" if n==9
replace variable="consult w siblings about educareer" if n==10
replace variable="consult w relatives about educareer" if n==11
replace variable="consult w friends about educareer" if n==12
replace variable="consult w neighbors about educareer" if n==13
replace variable="consult w teachers about educareer" if n==14
replace variable="consult w others about educareer" if n==15
replace variable="bias about blue-collar jobs" if n==16
replace variable="knowledge about TTI education" if n==17
replace variable="ranking of TTI degree" if n==18
replace variable="Big5 extroversion" if n==19
replace variable="Big5 conscientiousness" if n==20
replace variable="Big5 openness" if n==21
replace variable="Big5 agreeableness" if n==22
replace variable="Big5 neuroticism" if n==23
replace variable="wealth index" if n==24
replace variable="having relatives studying/having TTI degrees" if n==25
keep if n<=26
replace n=0 if n==26
sort n
order variable
replace variable="randomization seed" if n==0
cap drop n 
cap drop treat*

export delimited using "$randomization/randomization_student_TVET_waitlist_balance.csv", nolabel datafmt replace
restore 

foreach v of varlist waitlist* {
	rename `v' waitlist
}

preserve 
keep if waitlist==1
sort schoolname stream sex studentid 
cap keep schoolname studentid name sex b_* stream phone
cap lab def stream 1 "Arts" 2 "Commerce" 3 "Science" 4 "Rigzhung"
cap lab val stream stream
order schoolname stream name studentid sex b_* stream phone
export excel using "$randomization/randomization_student_TVET_waitlist.xlsx", replace firstrow(variables)
restore 
*/